# -*-coding:utf-8 -*-

from bilibili.items import BilibiliItem
import scrapy


class BiliSpider(scrapy.Spider):
    name = 'bili'  # 爬虫名称
    allowed_domains = ['bilibili.com']  # 网络域名
    start_urls = ['https://www.bilibili.com/v/popular/rank/all'
                  ]  # 默认爬取的网站的网页地址

    # response.xpath("//ul[@class='rank-list']/li/div[2]/div[2]/a/text()")
    def parse(self, response):
        li_list = response.xpath("//ul[@class='rank-list']/li")
        items = []
        for li in li_list:
            # 一定要加入extract_first()，否则回传的是一串html代码
            title = li.xpath("./div[2]/div[2]/a/text()").extract_first()
            score = li.xpath("./div[2]/div[2]/div[2]/div/text()").extract_first()
            # 创建一个实体类对象
            item = BilibiliItem()
            item['title'] = title
            item['score'] = score
            items.append(item)
        # 返回到管道（pipe）里面
        return items
